In [ ]:
%run "Preparation.ipynb"
In [ ]:
df2 = pd.DataFrame({
'customData.localplayerguid' : 1,
'playerId' : 2,
'C' : 3,
'D' : '4',
'E' : pd.Categorical(["test","train","test","train","test","train"]),
'F' : 'aaaaaa'
}, index=[0,1,2,3,4,5])
df2
In [ ]:
newColumns = np.unique(np.concatenate((minimalInitializationColumns, df2.columns.values)))
newColumns
In [ ]:
df3 = df2.loc[:,newColumns]
df3
In [ ]:
df4 = df3.rename(index=str, columns={'customData.localplayerguid' : 'userId', 'playerId': 'sessionId'})
df4
In [ ]:
df5 = getNormalizedRedMetricsCSV( df2 )
df5
In [ ]:
_rmDF = rmdf152
dropna = False
#def getAllSessions( _rmDF, dropna ):
_result = _rmDF.loc[:, renamedRelevantColumns]
_result = _result[_result['type']=='start']
_result = _result.drop('type', 1)
if dropna:
_result = _result.dropna(how='any')
_result
In [ ]:
_rmDF = rmdf152
_rmTestDF = dftest
# gets sessions which either:
# - have 'android' or '...editor' as platform
# - are in the RedMetrics test channel
# - belong to a user who has a session of the type above
#def getTestSessions(_rmDF, _rmTestDF):
# tables of association of user-sessions
rmDFUsersSessions = getAllSessions(_rmDF, False)
rmTestDFUsersSessions = getAllSessions(_rmTestDF, False)
userSessions = pd.concat([rmDFUsersSessions,rmTestDFUsersSessions])
# - have 'android' or '...editor' as platform
rmDFTestSessions = set(_rmDF[_rmDF['customData.platform'].apply(lambda s: str(s).endswith('editor"'))]['sessionId'])
rmDFTestSessions |= set(_rmDF[_rmDF['customData.platform'].isin(['"android"'])]['sessionId'])
#print(str(len(rmDFTestSessions)))
# - are in the RedMetrics test channel
rmTestDFTestSessions = set(_rmTestDF['sessionId'])
#print(str(len(rmTestDFTestSessions)))
# - belong to a user who has a session of the type above
# all the sessions above
testSessions = rmDFTestSessions | rmTestDFTestSessions
# all the users
rmDFTestUsers = set(rmDFUsersSessions[rmDFUsersSessions['sessionId'].isin(testSessions)]['userId'].dropna())
rmTestDFTestUsers = set(_rmTestDF['userId'].dropna())
testUsers = rmDFTestUsers | rmTestDFTestUsers
# all the sessions which belong to these users
result = set(rmDFUsersSessions[rmDFUsersSessions['userId'].isin(testUsers)]['sessionId'].dropna())
len(result)
In [ ]:
testSessions = result
filteredrmDF152 = rmdf152[~rmdf152['sessionId'].isin(testSessions)]
len(rmdf152['sessionId'].unique()),len(testSessions),len(filteredrmDF152['sessionId'].unique())
len(rmdf152[rmdf152['customData.platform']=='"linuxplayer"']['userId'].unique()),\
len(filteredrmDF152[filteredrmDF152['customData.platform']=='"linuxplayer"']['userId'].unique())
_rmDF = filteredrmDF152
temporaryMax = 0
userIdOfMax = 0
_sessionsList = getAllSessions( _rmDF, True )
for userId in _rmDF[_rmDF['customData.platform']=='"linuxplayer"']['userId'].unique():
allSessions = _sessionsList[_sessionsList['userId']==userId].drop_duplicates()
count = allSessions['sessionId'].nunique()
if(count > temporaryMax):
temporaryMax = count
userIdOfMax = userId
userIdOfMax, temporaryMax
rmdf152['customData.platform'].dropna().unique()
filteredrmDF152['customData.platform'].dropna().unique()
In [ ]:
_rmDF = rmdf152
_rmTestDF = normalizedRMDFTest
includeAndroid = True
includeEditor = True
includeTest = True
#def getTestSessions(_rmDF, _rmTestDF, includeAndroid = True, includeEditor = True, includeTest = True):
rmDFTestSessions = set()
rmTestDFTestSessions = set()
# - have 'android' or '...editor' as platform
if(includeAndroid):
rmDFTestSessions |= set(_rmDF[_rmDF['customData.platform'].isin(['"android"'])]['sessionId'])
if(includeEditor):
rmDFTestSessions |= set(_rmDF[_rmDF['customData.platform'].apply(lambda s: str(s).endswith('editor"'))]['sessionId'])
#print(str(len(rmDFTestSessions)))
# - are in the RedMetrics test channel
if(includeTest):
rmTestDFTestSessions = set(_rmTestDF['sessionId'])
#print(str(len(rmTestDFTestSessions)))
# - belong to a user who has a session of the type above
# all the sessions above
testSessions = rmDFTestSessions | rmTestDFTestSessions
testSessions
In [ ]:
_rmDF = rmdf152
_rmTestDF = normalizedRMDFTest
includeAndroid = True
includeEditor = True
includeTest = True
# gets sessions which either:
# - have 'android' or '...editor' as platform
# - are in the RedMetrics test channel
# - belong to a user who has a session of the type above
#def getTestUsersSessions(_rmDF, _rmTestDF, includeAndroid = True, includeEditor = True, includeTest = True):
# tables of association of user-sessions
rmDFUsersSessions = getAllSessions(_rmDF, False)
rmTestDFUsersSessions = getAllSessions(_rmTestDF, False)
userSessions = pd.concat([rmDFUsersSessions,rmTestDFUsersSessions])
testSessions = getTestSessions(_rmDF, _rmTestDF,
includeAndroid = includeAndroid, includeEditor = includeEditor, includeTest = includeTest)
# all the users
rmDFTestUsers = set(rmDFUsersSessions[rmDFUsersSessions['sessionId'].isin(testSessions)]['userId'].dropna())
rmTestDFTestUsers = set(_rmTestDF['userId'].dropna())
rmTestDFTestUsers.remove('')
testUsers = rmDFTestUsers | rmTestDFTestUsers
# all the sessions which belong to these users
allTestSessions = set(rmDFUsersSessions[rmDFUsersSessions['userId'].isin(testUsers)]['sessionId'].dropna())
#(testUsers,allTestSessions)
In [ ]:
testUsers = pd.DataFrame(data=list(testUsers), columns=['userId'])
testUsers152Path = dataFolderPath + dataFilesNamesStem + ".1.52-testUsers.csv"
testUsers.to_csv(testUsers152Path, encoding=csvEncoding)
testUsers2 = pd.read_csv(testUsers152Path, dtype=str).loc[:,['userId']]
In [ ]:
(testUsers == testUsers2).all()